0.1 Define Parameters

# Paths
path_to_save_obj <- "../results"
path_to_saved_post_QC_obj <- str_c(
  path_to_save_obj,
  "seurat_object_cite_seq_postQC.rds",
  sep = "/"
)
path_to_save_seurat_assess_doublets_obj <- str_c(
  path_to_save_obj,
  "seurat_object_cite_seq_seurat_eval_doublets.rds",
  sep = "/"
)
saved_cell_cycle_obj <- str_c(
  "../data/cycle.rda",
  sep = "/"
)

0.2 Read saved seurat object post QC

filtered_bcll.combined <- readRDS(path_to_saved_post_QC_obj)

1 Normalize

To normalize by sequencing depth, we will divide each count by the library size of the cell (total number of UMI) and log-transform it, similarly to other high-quality atlases, like the thymus and the heart atlas.

filtered_bcll.combined <- NormalizeData(
  filtered_bcll.combined,
  normalization.method = "LogNormalize",
  scale.factor = 1e4
)
filtered_bcll.combined[["RNA"]]@data[1:10, 1:10]
## 10 x 10 sparse Matrix of class "dgCMatrix"
##    [[ suppressing 10 column names 'BCLLATLAS_33_mLuLpVxi_v0fLyotc_AAACCTGCACAGACAG-1', 'BCLLATLAS_33_mLuLpVxi_v0fLyotc_AAACCTGCAGAGCCAA-1', 'BCLLATLAS_33_mLuLpVxi_v0fLyotc_AAACGGGCAAGCCCAC-1' ... ]]
##                                                    
## AL627309.1 .        .        . . . . . . .        .
## AL627309.5 .        .        . . . . . . .        .
## AP006222.2 .        .        . . . . . . .        .
## LINC01409  .        .        . . . . . . .        .
## FAM87B     .        .        . . . . . . .        .
## LINC01128  1.403951 .        . . . . . . .        .
## LINC00115  .        .        . . . . . . .        .
## FAM41C     .        .        . . . . . . .        .
## NOC2L      .        2.030531 . . . . . . 1.117081 .
## KLHL17     .        .        . . . . . . .        .

2 Visualize UMAP without batch effect correction

filtered_bcll.combined <- filtered_bcll.combined %>%
  FindVariableFeatures(nfeatures = 3000) %>%
  ScaleData() %>% 
  RunPCA() %>%
  RunUMAP(reduction = "pca", dims = 1:30)
## Centering and scaling data matrix
## PC_ 1 
## Positive:  STMN1, HMGB2, MKI67, NUSAP1, TOP2A, TUBA1B, HIST1H4C, H2AFZ, HIST1H1B, CENPF 
##     HMGN2, HMGB1, CDK1, TUBB, TUBB4B, KIFC1, KNL1, MCM7, TPX2, CCNB2 
##     BIRC5, HIST1H3B, CCNB1, CDCA8, CENPM, PTTG1, HJURP, CDCA3, NCAPH, SMC4 
## Negative:  MALAT1, IL32, CD3D, FYB1, GIMAP7, GIMAP4, GIMAP5, CCR7, IL7R, SRGN 
##     LCP2, CD3G, S100A4, TCF7, CD2, CD5, ITM2A, PLAAT4, SLFN5, TC2N 
##     CD69, INPP4B, TRBC1, ITK, SIRPG, CD28, GBP2, LAT, AHNAK, LINC00861 
## PC_ 2 
## Positive:  IL32, CD3D, FYB1, GIMAP4, GIMAP7, GIMAP5, CD3G, CD2, LCP2, SRGN 
##     CD5, TCF7, ITM2A, IL7R, CD28, SLFN5, LAT, SIRPG, S100A4, TIGIT 
##     ITK, TC2N, CTLA4, INPP4B, TRBC1, FYN, CD247, GBP2, PLAAT4, CCR7 
## Negative:  CD74, IFI30, CD83, JCHAIN, FCRL3, FCRL5, IGKC, MZB1, LINC00926, SPIB 
##     IGHG1, FCRL2, DERL3, IGHM, IGHG3, FCGR2B, PHACTR1, TNFRSF13B, CTSH, CD1C 
##     ENTPD1, SERPINA9, SIGLEC10, ST6GAL1, LRRK2, CD86, FCRL4, DHRS9, PLCG2, IGLC2 
## PC_ 3 
## Positive:  KIF20A, CENPE, CENPF, HMMR, CCNB2, ASPM, AURKA, CCNB1, KPNA2, TOP2A 
##     DEPDC1, DLGAP5, ARL6IP1, KIF23, KIF14, CDCA8, SGO2, KNSTRN, PSRC1, NEK2 
##     HJURP, BUB1, TPX2, PLK1, CDC20, CKAP2, MALAT1, ECT2, NUF2, POLH 
## Negative:  PCNA, GINS2, MCM6, MCM7, MCM3, MCM5, RRM2, MCM4, CDC6, HELLS 
##     CLSPN, FEN1, ATAD2, DHFR, FABP5, NME2, FAM111B, RFC2, PAICS, DTL 
##     HSPD1, CDC45, HSP90AB1, TK1, MCM10, CCNE2, UHRF1, CHCHD2, RPL8, RFC4 
## PC_ 4 
## Positive:  HIST1H3C, HIST1H1B, HIST1H2AH, HIST1H3B, HIST1H2BF, HIST1H1D, HIST1H4F, HIST1H2BH, HIST1H2BJ, HIST1H1C 
##     HIST1H4D, HIST1H3F, HIST2H2BF, HIST1H2AE, HIST1H2BL, HIST1H2BB, MTRNR2L12, HIST1H2BC, HIST1H1E, HIST1H2BM 
##     HIST1H3H, HIST1H4L, HIST1H4H, HIST1H2AB, HIST1H2BN, HIST1H4C, HIST1H2BI, HIST1H4B, MTRNR2L8, HIST1H3J 
## Negative:  RPL8, CHCHD2, LYZ, NME2, TYROBP, HSPA5, CDC20, FCER1G, LGALS1, PLK1 
##     SERBP1, HSP90B1, GRN, RABAC1, CRIP1, GSN, UBE2C, NANS, CALR, LGALS2 
##     FGL2, PTMA, CTSH, MLF2, PGAM1, SLC25A3, NDUFS7, CPVL, JPT1, ACTB 
## PC_ 5 
## Positive:  ACTB, LYZ, IFI30, IFNGR1, CAPG, FCRL4, MT-CO1, TYROBP, ZEB2, CD74 
##     TNFRSF13B, FGL2, CD83, LGALS2, FCER1G, CCR6, GSN, CD1C, CPVL, LST1 
##     PLAC8, ITGAX, LINC00926, IGSF6, MNDA, HSP90AB1, SPIB, SIGLEC10, FGR, AIF1 
## Negative:  JCHAIN, MZB1, DERL3, IGHG1, SSR4, XBP1, SEC11C, LINC02362, TENT5C, IGKC 
##     HSP90B1, IGHG4, FKBP11, AC012236.1, IGHG3, IGLC2, IGHGP, TXNDC5, SLAMF7, SELENOK 
##     PRDX4, LMAN1, TNFRSF17, IGLC1, IGHG2, RASSF6, IGLC3, PDIA4, HYOU1, HSPA5
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 13:06:30 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:06:30 Read 43986 rows and found 30 numeric columns
## 13:06:30 Using Annoy for neighbor search, n_neighbors = 30
## 13:06:30 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:06:33 Writing NN index file to temp file /tmp/RtmpWrQvXh/fileff389fabb32c
## 13:06:33 Searching Annoy index using 1 thread, search_k = 3000
## 13:06:45 Annoy recall = 100%
## 13:06:45 Commencing smooth kNN distance calibration using 1 thread
## 13:06:47 Initializing from normalized Laplacian + noise
## 13:06:48 Commencing optimization for 200 epochs, with 1981496 positive edges
## 13:07:03 Optimization finished

2.1 Plot UMAP with respect to donor_id

DimPlot(filtered_bcll.combined, group.by = "donor_id", pt.size = 0.1) + NoLegend()

2.2 Plot UMAP with respect to subproject

DimPlot(filtered_bcll.combined, group.by = "subproject", pt.size = 0.1)

2.3 Plot UMAP with respect to gem_id

DimPlot(filtered_bcll.combined, group.by = "gemid", pt.size = 0.1)

2.4 Plot UMAP with respect to seurat

DimPlot(filtered_bcll.combined, group.by = "scrublet_predicted_doublet", pt.size = 0.1)

FeaturePlot(filtered_bcll.combined, features = "scrublet_doublet_scores")

2.5 Check vireo doublets

DimPlot(filtered_bcll.combined, group.by = "genotype_based_doublet_flag", pt.size = 0.1)

2.6 Unassigned to any donor

DimPlot(filtered_bcll.combined, group.by = "genotype_based_unassigned_flag", pt.size = 0.1)

2.7 Check doublet feature scatter

2.8 Doublets defined by scrublet

filtered_bcll.combined_scrublet_doublet = subset(filtered_bcll.combined, subset = scrublet_predicted_doublet == 'True')
p1 <- FeatureScatter(filtered_bcll.combined_scrublet_doublet, feature1 = "CD4", feature2 = "CD8", group.by
 = "donor_id")
## Warning: Could not find CD8 in the default search locations, found in ADT assay
## instead
p2 <- FeatureScatter(filtered_bcll.combined_scrublet_doublet, feature1 = "CD19", feature2 = "CD3E", group.by
 = "donor_id")
p3 <- FeatureScatter(filtered_bcll.combined_scrublet_doublet, feature1 = "CD79B", feature2 = "CD3E", group.by
 = "donor_id")
p1

p2

p3

2.9 Check genotype doublet feature scatter

filtered_bcll.combined_genotype_doublet = subset(filtered_bcll.combined, subset = genotype_based_doublet_flag == 'T')
p1 <- FeatureScatter(filtered_bcll.combined_genotype_doublet, feature1 = "CD4", feature2 = "CD8", group.by
 = "donor_id")
## Warning: Could not find CD8 in the default search locations, found in ADT assay
## instead
p2 <- FeatureScatter(filtered_bcll.combined_genotype_doublet, feature1 = "CD19", feature2 = "CD3E", group.by
 = "donor_id")
p3 <- FeatureScatter(filtered_bcll.combined_genotype_doublet, feature1 = "CD79B", feature2 = "CD3E", group.by
 = "donor_id")
p1

p2

p3

2.10 Check genotype unassigned feature scatter

filtered_bcll.combined_genotype_unassigned = subset(filtered_bcll.combined, subset = genotype_based_unassigned_flag == 'T')
p1 <- FeatureScatter(filtered_bcll.combined_genotype_unassigned, feature1 = "CD4", feature2 = "CD8", group.by
 = "donor_id")
## Warning: Could not find CD8 in the default search locations, found in ADT assay
## instead
p2 <- FeatureScatter(filtered_bcll.combined_genotype_unassigned, feature1 = "CD19", feature2 = "CD3E", group.by
 = "donor_id")
p3 <- FeatureScatter(filtered_bcll.combined_genotype_unassigned, feature1 = "CD79B", feature2 = "CD3E", group.by
 = "donor_id")
p1

p2

p3

2.11 Check for Cell cycle stages variability between clusters

load(saved_cell_cycle_obj)
filtered_bcll.combined <- CellCycleScoring(filtered_bcll.combined, 
                                 g2m.features = g2m_genes, 
                                 s.features = s_genes)

2.12 Cell Cycle

# Plot the PCA colored by cell cycle phase
DimPlot(filtered_bcll.combined,
        reduction = "pca",
        group.by= "Phase",
        split.by = "Phase")

DimPlot(filtered_bcll.combined, group.by = 'Phase', pt.size = 0.1)

2.13 Check cells containing both TCR and BCR

2.14 Plot UMAP with respect to TCR

DimPlot(filtered_bcll.combined, group.by = "tcr_flag", pt.size = 0.1)

2.15 Plot UMAP with respect to BCR

DimPlot(filtered_bcll.combined, group.by = "bcr_flag", pt.size = 0.1)

3 Session Info

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] stringr_1.4.0      scales_1.1.1       ggplot2_3.3.5      dplyr_1.0.8       
## [5] SeuratObject_4.0.4 Seurat_4.1.0       knitr_1.37        
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15            colorspace_2.0-3      deldir_1.0-6         
##   [4] ellipsis_0.3.2        ggridges_0.5.3        spatstat.data_2.1-2  
##   [7] farver_2.1.0          leiden_0.3.9          listenv_0.8.0        
##  [10] ggrepel_0.9.1         RSpectra_0.16-0       fansi_1.0.2          
##  [13] codetools_0.2-18      splines_4.0.3         polyclip_1.10-0      
##  [16] jsonlite_1.8.0        ica_1.0-2             cluster_2.1.2        
##  [19] png_0.1-7             uwot_0.1.11           shiny_1.7.1          
##  [22] sctransform_0.3.3     spatstat.sparse_2.1-0 compiler_4.0.3       
##  [25] httr_1.4.2            assertthat_0.2.1      Matrix_1.4-0         
##  [28] fastmap_1.1.0         lazyeval_0.2.2        cli_3.2.0            
##  [31] later_1.3.0           htmltools_0.5.2       tools_4.0.3          
##  [34] igraph_1.2.11         gtable_0.3.0          glue_1.6.2           
##  [37] RANN_2.6.1            reshape2_1.4.4        Rcpp_1.0.8.3         
##  [40] scattermore_0.8       jquerylib_0.1.4       vctrs_0.3.8          
##  [43] nlme_3.1-155          lmtest_0.9-39         spatstat.random_2.1-0
##  [46] xfun_0.30             globals_0.14.0        mime_0.12            
##  [49] miniUI_0.1.1.1        lifecycle_1.0.1       irlba_2.3.5          
##  [52] goftest_1.2-3         future_1.24.0         MASS_7.3-55          
##  [55] zoo_1.8-9             spatstat.core_2.4-0   promises_1.2.0.1     
##  [58] spatstat.utils_2.3-0  parallel_4.0.3        RColorBrewer_1.1-2   
##  [61] yaml_2.3.5            reticulate_1.24       pbapply_1.5-0        
##  [64] gridExtra_2.3         sass_0.4.0            rpart_4.1.16         
##  [67] stringi_1.7.6         highr_0.9             rlang_1.0.2          
##  [70] pkgconfig_2.0.3       matrixStats_0.61.0    evaluate_0.15        
##  [73] lattice_0.20-45       ROCR_1.0-11           purrr_0.3.4          
##  [76] tensor_1.5            labeling_0.4.2        patchwork_1.1.1      
##  [79] htmlwidgets_1.5.4     cowplot_1.1.1         tidyselect_1.1.2     
##  [82] parallelly_1.30.0     RcppAnnoy_0.0.19      plyr_1.8.6           
##  [85] magrittr_2.0.2        R6_2.5.1              generics_0.1.2       
##  [88] DBI_1.1.2             withr_2.5.0           mgcv_1.8-39          
##  [91] pillar_1.7.0          fitdistrplus_1.1-8    survival_3.3-1       
##  [94] abind_1.4-5           tibble_3.1.6          future.apply_1.8.1   
##  [97] crayon_1.5.0          KernSmooth_2.23-20    utf8_1.2.2           
## [100] spatstat.geom_2.3-2   plotly_4.10.0         rmarkdown_2.13       
## [103] grid_4.0.3            data.table_1.14.2     digest_0.6.29        
## [106] xtable_1.8-4          tidyr_1.2.0           httpuv_1.6.5         
## [109] munsell_0.5.0         viridisLite_0.4.0     bslib_0.3.1